doxygen/AArch64PostLegalizerCombiner_8cpp_source.html

//=== AArch64PostLegalizerCombiner.cpp --------------------------*- C++ -*-===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

///

/// \file

/// Post-legalization combines on generic MachineInstrs.

///

/// The combines here must preserve instruction legality.

///

/// Lowering combines (e.g. pseudo matching) should be handled by

/// AArch64PostLegalizerLowering.

///

/// Combines which don't rely on instruction legality should go in the

/// AArch64PreLegalizerCombiner.

///

//===----------------------------------------------------------------------===//


#include "AArch64TargetMachine.h"

#include "llvm/ADT/STLExtras.h"

#include "llvm/CodeGen/GlobalISel/CSEInfo.h"

#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Combiner.h"

#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"

#include "llvm/CodeGen/GlobalISel/CombinerInfo.h"

#include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"

#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"

#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"

#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"

#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"

#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"

#include "llvm/CodeGen/GlobalISel/Utils.h"

#include "llvm/CodeGen/MachineDominators.h"

#include "llvm/CodeGen/MachineFunctionPass.h"

#include "llvm/CodeGen/MachineRegisterInfo.h"

#include "llvm/CodeGen/TargetOpcodes.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/Support/Debug.h"


#define GET_GICOMBINER_DEPS

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_DEPS


#define DEBUG_TYPE "aarch64-postlegalizer-combiner"


using namespace llvm;

using namespace MIPatternMatch;


namespace {


#define GET_GICOMBINER_TYPES

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_TYPES


/// This combine tries do what performExtractVectorEltCombine does in SDAG.

/// Rewrite for pairwise fadd pattern

///   (s32 (g_extract_vector_elt

///           (g_fadd (vXs32 Other)

///                  (g_vector_shuffle (vXs32 Other) undef <1,X,...> )) 0))

/// ->

///   (s32 (g_fadd (g_extract_vector_elt (vXs32 Other) 0)

///              (g_extract_vector_elt (vXs32 Other) 1))

bool matchExtractVecEltPairwiseAdd(

    MachineInstr &MI, MachineRegisterInfo &MRI,

    std::tuple<unsigned, LLT, Register> &MatchInfo) {

  Register Src1 = MI.getOperand(1).getReg();

  Register Src2 = MI.getOperand(2).getReg();

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());


  auto Cst = getIConstantVRegValWithLookThrough(Src2, MRI);

  if (!Cst || Cst->Value != 0)

    return false;

  // SDAG also checks for FullFP16, but this looks to be beneficial anyway.


  // Now check for an fadd operation. TODO: expand this for integer add?

  auto *FAddMI = getOpcodeDef(TargetOpcode::G_FADD, Src1, MRI);

  if (!FAddMI)

    return false;


  // If we add support for integer add, must restrict these types to just s64.

  unsigned DstSize = DstTy.getSizeInBits();

  if (DstSize != 16 && DstSize != 32 && DstSize != 64)

    return false;


  Register Src1Op1 = FAddMI->getOperand(1).getReg();

  Register Src1Op2 = FAddMI->getOperand(2).getReg();

  MachineInstr *Shuffle =

      getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op2, MRI);

  MachineInstr *Other = MRI.getVRegDef(Src1Op1);

  if (!Shuffle) {

    Shuffle = getOpcodeDef(TargetOpcode::G_SHUFFLE_VECTOR, Src1Op1, MRI);

    Other = MRI.getVRegDef(Src1Op2);

  }


  // We're looking for a shuffle that moves the second element to index 0.

  if (Shuffle && Shuffle->getOperand(3).getShuffleMask()[0] == 1 &&

      Other == MRI.getVRegDef(Shuffle->getOperand(1).getReg())) {

    std::get<0>(MatchInfo) = TargetOpcode::G_FADD;

    std::get<1>(MatchInfo) = DstTy;

    std::get<2>(MatchInfo) = Other->getOperand(0).getReg();

    return true;

  }

  return false;

}


void applyExtractVecEltPairwiseAdd(

    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,

    std::tuple<unsigned, LLT, Register> &MatchInfo) {

  unsigned Opc = std::get<0>(MatchInfo);

  assert(Opc == TargetOpcode::G_FADD && "Unexpected opcode!");

  // We want to generate two extracts of elements 0 and 1, and add them.

  LLT Ty = std::get<1>(MatchInfo);

  Register Src = std::get<2>(MatchInfo);

  LLT s64 = LLT::scalar(64);

  B.setInstrAndDebugLoc(MI);

  auto Elt0 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 0));

  auto Elt1 = B.buildExtractVectorElement(Ty, Src, B.buildConstant(s64, 1));

  B.buildInstr(Opc, {MI.getOperand(0).getReg()}, {Elt0, Elt1});

  MI.eraseFromParent();

}


bool isSignExtended(Register R, MachineRegisterInfo &MRI) {

  // TODO: check if extended build vector as well.

  unsigned Opc = MRI.getVRegDef(R)->getOpcode();

  return Opc == TargetOpcode::G_SEXT || Opc == TargetOpcode::G_SEXT_INREG;

}


bool isZeroExtended(Register R, MachineRegisterInfo &MRI) {

  // TODO: check if extended build vector as well.

  return MRI.getVRegDef(R)->getOpcode() == TargetOpcode::G_ZEXT;

}


bool matchAArch64MulConstCombine(

    MachineInstr &MI, MachineRegisterInfo &MRI,

    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {

  assert(MI.getOpcode() == TargetOpcode::G_MUL);

  Register LHS = MI.getOperand(1).getReg();

  Register RHS = MI.getOperand(2).getReg();

  Register Dst = MI.getOperand(0).getReg();

  const LLT Ty = MRI.getType(LHS);


  // The below optimizations require a constant RHS.

  auto Const = getIConstantVRegValWithLookThrough(RHS, MRI);

  if (!Const)

    return false;


  APInt ConstValue = Const->Value.sext(Ty.getSizeInBits());

  // The following code is ported from AArch64ISelLowering.

  // Multiplication of a power of two plus/minus one can be done more

  // cheaply as shift+add/sub. For now, this is true unilaterally. If

  // future CPUs have a cheaper MADD instruction, this may need to be

  // gated on a subtarget feature. For Cyclone, 32-bit MADD is 4 cycles and

  // 64-bit is 5 cycles, so this is always a win.

  // More aggressively, some multiplications N0 * C can be lowered to

  // shift+add+shift if the constant C = A * B where A = 2^N + 1 and B = 2^M,

  // e.g. 6=3*2=(2+1)*2.

  // TODO: consider lowering more cases, e.g. C = 14, -6, -14 or even 45

  // which equals to (1+2)*16-(1+2).

  // TrailingZeroes is used to test if the mul can be lowered to

  // shift+add+shift.

  unsigned TrailingZeroes = ConstValue.countr_zero();

  if (TrailingZeroes) {

    // Conservatively do not lower to shift+add+shift if the mul might be

    // folded into smul or umul.

    if (MRI.hasOneNonDBGUse(LHS) &&

        (isSignExtended(LHS, MRI) || isZeroExtended(LHS, MRI)))

      return false;

    // Conservatively do not lower to shift+add+shift if the mul might be

    // folded into madd or msub.

    if (MRI.hasOneNonDBGUse(Dst)) {

      MachineInstr &UseMI = *MRI.use_instr_begin(Dst);

      unsigned UseOpc = UseMI.getOpcode();

      if (UseOpc == TargetOpcode::G_ADD || UseOpc == TargetOpcode::G_PTR_ADD ||

          UseOpc == TargetOpcode::G_SUB)

        return false;

    }

  }

  // Use ShiftedConstValue instead of ConstValue to support both shift+add/sub

  // and shift+add+shift.

  APInt ShiftedConstValue = ConstValue.ashr(TrailingZeroes);


  unsigned ShiftAmt, AddSubOpc;

  // Is the shifted value the LHS operand of the add/sub?

  bool ShiftValUseIsLHS = true;

  // Do we need to negate the result?

  bool NegateResult = false;


  if (ConstValue.isNonNegative()) {

    // (mul x, 2^N + 1) => (add (shl x, N), x)

    // (mul x, 2^N - 1) => (sub (shl x, N), x)

    // (mul x, (2^N + 1) * 2^M) => (shl (add (shl x, N), x), M)

    APInt SCVMinus1 = ShiftedConstValue - 1;

    APInt CVPlus1 = ConstValue + 1;

    if (SCVMinus1.isPowerOf2()) {

      ShiftAmt = SCVMinus1.logBase2();

      AddSubOpc = TargetOpcode::G_ADD;

    } else if (CVPlus1.isPowerOf2()) {

      ShiftAmt = CVPlus1.logBase2();

      AddSubOpc = TargetOpcode::G_SUB;

    } else

      return false;

  } else {

    // (mul x, -(2^N - 1)) => (sub x, (shl x, N))

    // (mul x, -(2^N + 1)) => - (add (shl x, N), x)

    APInt CVNegPlus1 = -ConstValue + 1;

    APInt CVNegMinus1 = -ConstValue - 1;

    if (CVNegPlus1.isPowerOf2()) {

      ShiftAmt = CVNegPlus1.logBase2();

      AddSubOpc = TargetOpcode::G_SUB;

      ShiftValUseIsLHS = false;

    } else if (CVNegMinus1.isPowerOf2()) {

      ShiftAmt = CVNegMinus1.logBase2();

      AddSubOpc = TargetOpcode::G_ADD;

      NegateResult = true;

    } else

      return false;

  }


  if (NegateResult && TrailingZeroes)

    return false;


  ApplyFn = [=](MachineIRBuilder &B, Register DstReg) {

    auto Shift = B.buildConstant(LLT::scalar(64), ShiftAmt);

    auto ShiftedVal = B.buildShl(Ty, LHS, Shift);


    Register AddSubLHS = ShiftValUseIsLHS ? ShiftedVal.getReg(0) : LHS;

    Register AddSubRHS = ShiftValUseIsLHS ? LHS : ShiftedVal.getReg(0);

    auto Res = B.buildInstr(AddSubOpc, {Ty}, {AddSubLHS, AddSubRHS});

    assert(!(NegateResult && TrailingZeroes) &&

           "NegateResult and TrailingZeroes cannot both be true for now.");

    // Negate the result.

    if (NegateResult) {

      B.buildSub(DstReg, B.buildConstant(Ty, 0), Res);

      return;

    }

    // Shift the result.

    if (TrailingZeroes) {

      B.buildShl(DstReg, Res, B.buildConstant(LLT::scalar(64), TrailingZeroes));

      return;

    }

    B.buildCopy(DstReg, Res.getReg(0));

  };

  return true;

}


void applyAArch64MulConstCombine(

    MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,

    std::function<void(MachineIRBuilder &B, Register DstReg)> &ApplyFn) {

  B.setInstrAndDebugLoc(MI);

  ApplyFn(B, MI.getOperand(0).getReg());

  MI.eraseFromParent();

}


/// Try to fold a G_MERGE_VALUES of 2 s32 sources, where the second source

/// is a zero, into a G_ZEXT of the first.

bool matchFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI) {

  auto &Merge = cast<GMerge>(MI);

  LLT SrcTy = MRI.getType(Merge.getSourceReg(0));

  if (SrcTy != LLT::scalar(32) || Merge.getNumSources() != 2)

    return false;

  return mi_match(Merge.getSourceReg(1), MRI, m_SpecificICst(0));

}


void applyFoldMergeToZext(MachineInstr &MI, MachineRegisterInfo &MRI,

                          MachineIRBuilder &B, GISelChangeObserver &Observer) {

  // Mutate %d(s64) = G_MERGE_VALUES %a(s32), 0(s32)

  //  ->

  // %d(s64) = G_ZEXT %a(s32)

  Observer.changingInstr(MI);

  MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));

  MI.removeOperand(2);

  Observer.changedInstr(MI);

}


/// \returns True if a G_ANYEXT instruction \p MI should be mutated to a G_ZEXT

/// instruction.

bool matchMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI) {

  // If this is coming from a scalar compare then we can use a G_ZEXT instead of

  // a G_ANYEXT:

  //

  // %cmp:_(s32) = G_[I|F]CMP ... <-- produces 0/1.

  // %ext:_(s64) = G_ANYEXT %cmp(s32)

  //

  // By doing this, we can leverage more KnownBits combines.

  assert(MI.getOpcode() == TargetOpcode::G_ANYEXT);

  Register Dst = MI.getOperand(0).getReg();

  Register Src = MI.getOperand(1).getReg();

  return MRI.getType(Dst).isScalar() &&

         mi_match(Src, MRI,

                  m_any_of(m_GICmp(m_Pred(), m_Reg(), m_Reg()),

                           m_GFCmp(m_Pred(), m_Reg(), m_Reg())));

}


void applyMutateAnyExtToZExt(MachineInstr &MI, MachineRegisterInfo &MRI,

                             MachineIRBuilder &B,

                             GISelChangeObserver &Observer) {

  Observer.changingInstr(MI);

  MI.setDesc(B.getTII().get(TargetOpcode::G_ZEXT));

  Observer.changedInstr(MI);

}


/// Match a 128b store of zero and split it into two 64 bit stores, for

/// size/performance reasons.

bool matchSplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI) {

  GStore &Store = cast<GStore>(MI);

  if (!Store.isSimple())

    return false;

  LLT ValTy = MRI.getType(Store.getValueReg());

  if (ValTy.isScalableVector())

    return false;

  if (!ValTy.isVector() || ValTy.getSizeInBits() != 128)

    return false;

  if (Store.getMemSizeInBits() != ValTy.getSizeInBits())

    return false; // Don't split truncating stores.

  if (!MRI.hasOneNonDBGUse(Store.getValueReg()))

    return false;

  auto MaybeCst = isConstantOrConstantSplatVector(

      *MRI.getVRegDef(Store.getValueReg()), MRI);

  return MaybeCst && MaybeCst->isZero();

}


void applySplitStoreZero128(MachineInstr &MI, MachineRegisterInfo &MRI,

                            MachineIRBuilder &B,

                            GISelChangeObserver &Observer) {

  B.setInstrAndDebugLoc(MI);

  GStore &Store = cast<GStore>(MI);

  assert(MRI.getType(Store.getValueReg()).isVector() &&

         "Expected a vector store value");

  LLT NewTy = LLT::scalar(64);

  Register PtrReg = Store.getPointerReg();

  auto Zero = B.buildConstant(NewTy, 0);

  auto HighPtr = B.buildPtrAdd(MRI.getType(PtrReg), PtrReg,

                               B.buildConstant(LLT::scalar(64), 8));

  auto &MF = *MI.getMF();

  auto *LowMMO = MF.getMachineMemOperand(&Store.getMMO(), 0, NewTy);

  auto *HighMMO = MF.getMachineMemOperand(&Store.getMMO(), 8, NewTy);

  B.buildStore(Zero, PtrReg, *LowMMO);

  B.buildStore(Zero, HighPtr, *HighMMO);

  Store.eraseFromParent();

}


bool matchOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,

                  std::tuple<Register, Register, Register> &MatchInfo) {

  const LLT DstTy = MRI.getType(MI.getOperand(0).getReg());

  if (!DstTy.isVector())

    return false;


  Register AO1, AO2, BVO1, BVO2;

  if (!mi_match(MI, MRI,

                m_GOr(m_GAnd(m_Reg(AO1), m_Reg(BVO1)),

                      m_GAnd(m_Reg(AO2), m_Reg(BVO2)))))

    return false;


  auto *BV1 = getOpcodeDef<GBuildVector>(BVO1, MRI);

  auto *BV2 = getOpcodeDef<GBuildVector>(BVO2, MRI);

  if (!BV1 || !BV2)

    return false;


  for (int I = 0, E = DstTy.getNumElements(); I < E; I++) {

    auto ValAndVReg1 =

        getIConstantVRegValWithLookThrough(BV1->getSourceReg(I), MRI);

    auto ValAndVReg2 =

        getIConstantVRegValWithLookThrough(BV2->getSourceReg(I), MRI);

    if (!ValAndVReg1 || !ValAndVReg2 ||

        ValAndVReg1->Value != ~ValAndVReg2->Value)

      return false;

  }


  MatchInfo = {AO1, AO2, BVO1};

  return true;

}


void applyOrToBSP(MachineInstr &MI, MachineRegisterInfo &MRI,

                  MachineIRBuilder &B,

                  std::tuple<Register, Register, Register> &MatchInfo) {

  B.setInstrAndDebugLoc(MI);

  B.buildInstr(

      AArch64::G_BSP, {MI.getOperand(0).getReg()},

      {std::get<2>(MatchInfo), std::get<0>(MatchInfo), std::get<1>(MatchInfo)});

  MI.eraseFromParent();

}


// Combines Mul(And(Srl(X, 15), 0x10001), 0xffff) into CMLTz

bool matchCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,

                         Register &SrcReg) {

  LLT DstTy = MRI.getType(MI.getOperand(0).getReg());


  if (DstTy != LLT::fixed_vector(2, 64) && DstTy != LLT::fixed_vector(2, 32) &&

      DstTy != LLT::fixed_vector(4, 32) && DstTy != LLT::fixed_vector(4, 16) &&

      DstTy != LLT::fixed_vector(8, 16))

    return false;


  auto AndMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);

  if (AndMI->getOpcode() != TargetOpcode::G_AND)

    return false;

  auto LShrMI = getDefIgnoringCopies(AndMI->getOperand(1).getReg(), MRI);

  if (LShrMI->getOpcode() != TargetOpcode::G_LSHR)

    return false;


  // Check the constant splat values

  auto V1 = isConstantOrConstantSplatVector(

      *MRI.getVRegDef(MI.getOperand(2).getReg()), MRI);

  auto V2 = isConstantOrConstantSplatVector(

      *MRI.getVRegDef(AndMI->getOperand(2).getReg()), MRI);

  auto V3 = isConstantOrConstantSplatVector(

      *MRI.getVRegDef(LShrMI->getOperand(2).getReg()), MRI);

  if (!V1.has_value() || !V2.has_value() || !V3.has_value())

    return false;

  unsigned HalfSize = DstTy.getScalarSizeInBits() / 2;

  if (!V1.value().isMask(HalfSize) || V2.value() != (1ULL | 1ULL << HalfSize) ||

      V3 != (HalfSize - 1))

    return false;


  SrcReg = LShrMI->getOperand(1).getReg();


  return true;

}


void applyCombineMulCMLT(MachineInstr &MI, MachineRegisterInfo &MRI,

                         MachineIRBuilder &B, Register &SrcReg) {

  Register DstReg = MI.getOperand(0).getReg();

  LLT DstTy = MRI.getType(DstReg);

  LLT HalfTy =

      DstTy.changeElementCount(DstTy.getElementCount().multiplyCoefficientBy(2))

          .changeElementSize(DstTy.getScalarSizeInBits() / 2);


  Register ZeroVec = B.buildConstant(HalfTy, 0).getReg(0);

  Register CastReg =

      B.buildInstr(TargetOpcode::G_BITCAST, {HalfTy}, {SrcReg}).getReg(0);

  Register CMLTReg =

      B.buildICmp(CmpInst::Predicate::ICMP_SLT, HalfTy, CastReg, ZeroVec)

          .getReg(0);


  B.buildInstr(TargetOpcode::G_BITCAST, {DstReg}, {CMLTReg}).getReg(0);

  MI.eraseFromParent();

}


class AArch64PostLegalizerCombinerImpl : public Combiner {

protected:

  // TODO: Make CombinerHelper methods const.

  mutable CombinerHelper Helper;

  const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig;

  const AArch64Subtarget &STI;


public:

  AArch64PostLegalizerCombinerImpl(

      MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

      GISelKnownBits &KB, GISelCSEInfo *CSEInfo,

      const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,

      const AArch64Subtarget &STI, MachineDominatorTree *MDT,

      const LegalizerInfo *LI);


  static const char *getName() { return "AArch64PostLegalizerCombiner"; }


  bool tryCombineAll(MachineInstr &I) const override;


private:

#define GET_GICOMBINER_CLASS_MEMBERS

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CLASS_MEMBERS

};


#define GET_GICOMBINER_IMPL

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_IMPL


AArch64PostLegalizerCombinerImpl::AArch64PostLegalizerCombinerImpl(

    MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,

    GISelKnownBits &KB, GISelCSEInfo *CSEInfo,

    const AArch64PostLegalizerCombinerImplRuleConfig &RuleConfig,

    const AArch64Subtarget &STI, MachineDominatorTree *MDT,

    const LegalizerInfo *LI)

    : Combiner(MF, CInfo, TPC, &KB, CSEInfo),

      Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),

      RuleConfig(RuleConfig), STI(STI),

#define GET_GICOMBINER_CONSTRUCTOR_INITS

#include "AArch64GenPostLegalizeGICombiner.inc"

#undef GET_GICOMBINER_CONSTRUCTOR_INITS

{

}


class AArch64PostLegalizerCombiner : public MachineFunctionPass {

public:

  static char ID;


  AArch64PostLegalizerCombiner(bool IsOptNone = false);


  StringRef getPassName() const override {

    return "AArch64PostLegalizerCombiner";

  }


  bool runOnMachineFunction(MachineFunction &MF) override;

  void getAnalysisUsage(AnalysisUsage &AU) const override;


private:

  bool IsOptNone;

  AArch64PostLegalizerCombinerImplRuleConfig RuleConfig;


  struct StoreInfo {

    GStore *St = nullptr;

    // The G_PTR_ADD that's used by the store. We keep this to cache the

    // MachineInstr def.

    GPtrAdd *Ptr = nullptr;

    // The signed offset to the Ptr instruction.

    int64_t Offset = 0;

    LLT StoredType;

  };

  bool tryOptimizeConsecStores(SmallVectorImpl<StoreInfo> &Stores,

                               CSEMIRBuilder &MIB);


  bool optimizeConsecutiveMemOpAddressing(MachineFunction &MF,

                                          CSEMIRBuilder &MIB);

};

} // end anonymous namespace


void AArch64PostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {

  AU.addRequired<TargetPassConfig>();

  AU.setPreservesCFG();

  getSelectionDAGFallbackAnalysisUsage(AU);

  AU.addRequired<GISelKnownBitsAnalysis>();

  AU.addPreserved<GISelKnownBitsAnalysis>();

  if (!IsOptNone) {

    AU.addRequired<MachineDominatorTreeWrapperPass>();

    AU.addPreserved<MachineDominatorTreeWrapperPass>();

    AU.addRequired<GISelCSEAnalysisWrapperPass>();

    AU.addPreserved<GISelCSEAnalysisWrapperPass>();

  }

  MachineFunctionPass::getAnalysisUsage(AU);

}


AArch64PostLegalizerCombiner::AArch64PostLegalizerCombiner(bool IsOptNone)

    : MachineFunctionPass(ID), IsOptNone(IsOptNone) {

  initializeAArch64PostLegalizerCombinerPass(*PassRegistry::getPassRegistry());


  if (!RuleConfig.parseCommandLineOption())

    report_fatal_error("Invalid rule identifier");

}


bool AArch64PostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {

  if (MF.getProperties().hasProperty(

          MachineFunctionProperties::Property::FailedISel))

    return false;

  assert(MF.getProperties().hasProperty(

             MachineFunctionProperties::Property::Legalized) &&

         "Expected a legalized function?");

  auto *TPC = &getAnalysis<TargetPassConfig>();

  const Function &F = MF.getFunction();

  bool EnableOpt =

      MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);


  const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();

  const auto *LI = ST.getLegalizerInfo();


  GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);

  MachineDominatorTree *MDT =

      IsOptNone ? nullptr

                : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();

  GISelCSEAnalysisWrapper &Wrapper =

      getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();

  auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig());


  CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,

                     /*LegalizerInfo*/ nullptr, EnableOpt, F.hasOptSize(),

                     F.hasMinSize());

  AArch64PostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo,

                                        RuleConfig, ST, MDT, LI);

  bool Changed = Impl.combineMachineInstrs();


  auto MIB = CSEMIRBuilder(MF);

  MIB.setCSEInfo(CSEInfo);

  Changed |= optimizeConsecutiveMemOpAddressing(MF, MIB);

  return Changed;

}


bool AArch64PostLegalizerCombiner::tryOptimizeConsecStores(

    SmallVectorImpl<StoreInfo> &Stores, CSEMIRBuilder &MIB) {

  if (Stores.size() <= 2)

    return false;


  // Profitabity checks:

  int64_t BaseOffset = Stores[0].Offset;

  unsigned NumPairsExpected = Stores.size() / 2;

  unsigned TotalInstsExpected = NumPairsExpected + (Stores.size() % 2);

  // Size savings will depend on whether we can fold the offset, as an

  // immediate of an ADD.

  auto &TLI = *MIB.getMF().getSubtarget().getTargetLowering();

  if (!TLI.isLegalAddImmediate(BaseOffset))

    TotalInstsExpected++;

  int SavingsExpected = Stores.size() - TotalInstsExpected;

  if (SavingsExpected <= 0)

    return false;


  auto &MRI = MIB.getMF().getRegInfo();


  // We have a series of consecutive stores. Factor out the common base

  // pointer and rewrite the offsets.

  Register NewBase = Stores[0].Ptr->getReg(0);

  for (auto &SInfo : Stores) {

    // Compute a new pointer with the new base ptr and adjusted offset.

    MIB.setInstrAndDebugLoc(*SInfo.St);

    auto NewOff = MIB.buildConstant(LLT::scalar(64), SInfo.Offset - BaseOffset);

    auto NewPtr = MIB.buildPtrAdd(MRI.getType(SInfo.St->getPointerReg()),

                                  NewBase, NewOff);

    if (MIB.getObserver())

      MIB.getObserver()->changingInstr(*SInfo.St);

    SInfo.St->getOperand(1).setReg(NewPtr.getReg(0));

    if (MIB.getObserver())

      MIB.getObserver()->changedInstr(*SInfo.St);

  }

  LLVM_DEBUG(dbgs() << "Split a series of " << Stores.size()

                    << " stores into a base pointer and offsets.\n");

  return true;

}


static cl::opt<bool>

    EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops",

                              cl::init(true), cl::Hidden,

                              cl::desc("Enable consecutive memop optimization "

                                       "in AArch64PostLegalizerCombiner"));


bool AArch64PostLegalizerCombiner::optimizeConsecutiveMemOpAddressing(

    MachineFunction &MF, CSEMIRBuilder &MIB) {

  // This combine needs to run after all reassociations/folds on pointer

  // addressing have been done, specifically those that combine two G_PTR_ADDs

  // with constant offsets into a single G_PTR_ADD with a combined offset.

  // The goal of this optimization is to undo that combine in the case where

  // doing so has prevented the formation of pair stores due to illegal

  // addressing modes of STP. The reason that we do it here is because

  // it's much easier to undo the transformation of a series consecutive

  // mem ops, than it is to detect when doing it would be a bad idea looking

  // at a single G_PTR_ADD in the reassociation/ptradd_immed_chain combine.

  //

  // An example:

  //   G_STORE %11:_(<2 x s64>), %base:_(p0) :: (store (<2 x s64>), align 1)

  //   %off1:_(s64) = G_CONSTANT i64 4128

  //   %p1:_(p0) = G_PTR_ADD %0:_, %off1:_(s64)

  //   G_STORE %11:_(<2 x s64>), %p1:_(p0) :: (store (<2 x s64>), align 1)

  //   %off2:_(s64) = G_CONSTANT i64 4144

  //   %p2:_(p0) = G_PTR_ADD %0:_, %off2:_(s64)

  //   G_STORE %11:_(<2 x s64>), %p2:_(p0) :: (store (<2 x s64>), align 1)

  //   %off3:_(s64) = G_CONSTANT i64 4160

  //   %p3:_(p0) = G_PTR_ADD %0:_, %off3:_(s64)

  //   G_STORE %11:_(<2 x s64>), %17:_(p0) :: (store (<2 x s64>), align 1)

  bool Changed = false;

  auto &MRI = MF.getRegInfo();


  if (!EnableConsecutiveMemOpOpt)

    return Changed;


  SmallVector<StoreInfo, 8> Stores;

  // If we see a load, then we keep track of any values defined by it.

  // In the following example, STP formation will fail anyway because

  // the latter store is using a load result that appears after the

  // the prior store. In this situation if we factor out the offset then

  // we increase code size for no benefit.

  //   G_STORE %v1:_(s64), %base:_(p0) :: (store (s64))

  //   %v2:_(s64) = G_LOAD %ldptr:_(p0) :: (load (s64))

  //   G_STORE %v2:_(s64), %base:_(p0) :: (store (s64))

  SmallVector<Register> LoadValsSinceLastStore;


  auto storeIsValid = [&](StoreInfo &Last, StoreInfo New) {

    // Check if this store is consecutive to the last one.

    if (Last.Ptr->getBaseReg() != New.Ptr->getBaseReg() ||

        (Last.Offset + static_cast<int64_t>(Last.StoredType.getSizeInBytes()) !=

         New.Offset) ||

        Last.StoredType != New.StoredType)

      return false;


    // Check if this store is using a load result that appears after the

    // last store. If so, bail out.

    if (any_of(LoadValsSinceLastStore, [&](Register LoadVal) {

          return New.St->getValueReg() == LoadVal;

        }))

      return false;


    // Check if the current offset would be too large for STP.

    // If not, then STP formation should be able to handle it, so we don't

    // need to do anything.

    int64_t MaxLegalOffset;

    switch (New.StoredType.getSizeInBits()) {

    case 32:

      MaxLegalOffset = 252;

      break;

    case 64:

      MaxLegalOffset = 504;

      break;

    case 128:

      MaxLegalOffset = 1008;

      break;

    default:

      llvm_unreachable("Unexpected stored type size");

    }

    if (New.Offset < MaxLegalOffset)

      return false;


    // If factoring it out still wouldn't help then don't bother.

    return New.Offset - Stores[0].Offset <= MaxLegalOffset;

  };


  auto resetState = [&]() {

    Stores.clear();

    LoadValsSinceLastStore.clear();

  };


  for (auto &MBB : MF) {

    // We're looking inside a single BB at a time since the memset pattern

    // should only be in a single block.

    resetState();

    for (auto &MI : MBB) {

      // Skip for scalable vectors

      if (auto *LdSt = dyn_cast<GLoadStore>(&MI);

          LdSt && MRI.getType(LdSt->getOperand(0).getReg()).isScalableVector())

        continue;


      if (auto *St = dyn_cast<GStore>(&MI)) {

        Register PtrBaseReg;

        APInt Offset;

        LLT StoredValTy = MRI.getType(St->getValueReg());

        unsigned ValSize = StoredValTy.getSizeInBits();

        if (ValSize < 32 || St->getMMO().getSizeInBits() != ValSize)

          continue;


        Register PtrReg = St->getPointerReg();

        if (mi_match(

                PtrReg, MRI,

                m_OneNonDBGUse(m_GPtrAdd(m_Reg(PtrBaseReg), m_ICst(Offset))))) {

          GPtrAdd *PtrAdd = cast<GPtrAdd>(MRI.getVRegDef(PtrReg));

          StoreInfo New = {St, PtrAdd, Offset.getSExtValue(), StoredValTy};


          if (Stores.empty()) {

            Stores.push_back(New);

            continue;

          }


          // Check if this store is a valid continuation of the sequence.

          auto &Last = Stores.back();

          if (storeIsValid(Last, New)) {

            Stores.push_back(New);

            LoadValsSinceLastStore.clear(); // Reset the load value tracking.

          } else {

            // The store isn't a valid to consider for the prior sequence,

            // so try to optimize what we have so far and start a new sequence.

            Changed |= tryOptimizeConsecStores(Stores, MIB);

            resetState();

            Stores.push_back(New);

          }

        }

      } else if (auto *Ld = dyn_cast<GLoad>(&MI)) {

        LoadValsSinceLastStore.push_back(Ld->getDstReg());

      }

    }

    Changed |= tryOptimizeConsecStores(Stores, MIB);

    resetState();

  }


  return Changed;

}


char AArch64PostLegalizerCombiner::ID = 0;

INITIALIZE_PASS_BEGIN(AArch64PostLegalizerCombiner, DEBUG_TYPE,

                      "Combine AArch64 MachineInstrs after legalization", false,

                      false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)

INITIALIZE_PASS_END(AArch64PostLegalizerCombiner, DEBUG_TYPE,

                    "Combine AArch64 MachineInstrs after legalization", false,

                    false)


namespace llvm {

FunctionPass *createAArch64PostLegalizerCombiner(bool IsOptNone) {

  return new AArch64PostLegalizerCombiner(IsOptNone);

}

} // end namespace llvm

MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105

UseMI
MachineInstrBuilder & UseMI
Definition: AArch64ExpandPseudoInsts.cpp:110

isZeroExtended
static bool isZeroExtended(SDValue N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:5061

isSignExtended
static bool isSignExtended(SDValue N, SelectionDAG &DAG)
Definition: AArch64ISelLowering.cpp:5055

GET_GICOMBINER_CONSTRUCTOR_INITS
#define GET_GICOMBINER_CONSTRUCTOR_INITS

EnableConsecutiveMemOpOpt
static cl::opt< bool > EnableConsecutiveMemOpOpt("aarch64-postlegalizer-consecutive-memops", cl::init(true), cl::Hidden, cl::desc("Enable consecutive memop optimization " "in AArch64PostLegalizerCombiner"))

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AArch64PostLegalizerCombiner.cpp:47

legalization
Combine AArch64 MachineInstrs after legalization
Definition: AArch64PostLegalizerCombiner.cpp:770

AArch64TargetMachine.h

Wrapper
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
Definition: AMDGPUAliasAnalysis.cpp:31

MBB
MachineBasicBlock & MBB
Definition: ARMSLSHardening.cpp:71

B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")

CSEInfo.h
Provides analysis for continuously CSEing during GISel passes.

CSEMIRBuilder.h
This file implements a version of MachineIRBuilder which CSEs insts within a MachineBasicBlock.

Utils.h

CombinerHelper.h
This contains common combine transformations that may be used in a combine pass,or by the target else...

CombinerInfo.h
Option class for Targets to specify which operations are combined how and when.

Combiner.h
This contains the base class for all Combiners generated by TableGen.

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101

Other
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1294

GIMatchTableExecutorImpl.h

GISelChangeObserver.h
This contains common code to allow clients to notify changes to machine instr.

GISelKnownBits.h
Provides analysis for querying information about KnownBits during GISel passes.

GenericMachineInstrs.h
Declares convenience wrapper classes for interpreting MachineInstr instances as specific generic oper...

Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:2987

MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:113

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MIPatternMatch.h
Contains matchers for matching SSA Machine Instructions.

MachineDominators.h

MachineFunctionPass.h

MachineIRBuilder.h
This file declares the MachineIRBuilder class.

MachineRegisterInfo.h

getReg
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Definition: MipsDisassembler.cpp:521

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

getName
static StringRef getName(Value *V)
Definition: ProvenanceAnalysisEvaluator.cpp:20

Merge
R600 Clause Merge
Definition: R600ClauseMergePass.cpp:70

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLExtras.h
This file contains some templates that are useful if you are working with the STL at all.

Ptr
@ Ptr
Definition: TargetLibraryInfo.cpp:77

TargetOpcodes.h

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

RHS
Value * RHS
Definition: X86PartialReduction.cpp:76

LHS
Value * LHS
Definition: X86PartialReduction.cpp:75

llvm::AArch64Subtarget
Definition: AArch64Subtarget.h:38

llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:78

llvm::APInt::countr_zero
unsigned countr_zero() const
Count the number of trailing zero bits.
Definition: APInt.h:1598

llvm::APInt::logBase2
unsigned logBase2() const
Definition: APInt.h:1719

llvm::APInt::ashr
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:807

llvm::APInt::isNonNegative
bool isNonNegative() const
Determine if this APInt Value is non-negative (>= 0)
Definition: APInt.h:314

llvm::APInt::isPowerOf2
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:420

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98

llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269

llvm::CSEMIRBuilder
Defines a builder that does CSE of MachineInstructions using GISelCSEInfo.
Definition: CSEMIRBuilder.h:32

llvm::CSEMIRBuilder::buildConstant
MachineInstrBuilder buildConstant(const DstOp &Res, const ConstantInt &Val) override
Build and insert Res = G_CONSTANT Val.
Definition: CSEMIRBuilder.cpp:328

llvm::CombinerHelper
Definition: CombinerHelper.h:103

llvm::Combiner
Combiner implementation.
Definition: Combiner.h:34

llvm::Combiner::tryCombineAll
virtual bool tryCombineAll(MachineInstr &I) const =0

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311

llvm::Function
Definition: Function.h:64

llvm::GISelCSEAnalysisWrapperPass
The actual analysis pass wrapper.
Definition: CSEInfo.h:222

llvm::GISelCSEAnalysisWrapper
Simple wrapper that does the following.
Definition: CSEInfo.h:204

llvm::GISelCSEInfo
The CSE Analysis object.
Definition: CSEInfo.h:69

llvm::GISelChangeObserver
Abstract class that contains various methods for clients to notify about changes.
Definition: GISelChangeObserver.h:29

llvm::GISelChangeObserver::changingInstr
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.

llvm::GISelChangeObserver::changedInstr
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.

llvm::GISelKnownBitsAnalysis
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelKnownBitsInfoAnalysis...
Definition: GISelKnownBits.h:113

llvm::GISelKnownBits
Definition: GISelKnownBits.h:29

llvm::GPtrAdd
Represents a G_PTR_ADD.
Definition: GenericMachineInstrs.h:328

llvm::GStore
Represents a G_STORE.
Definition: GenericMachineInstrs.h:238

llvm::LLT
Definition: LowLevelType.h:39

llvm::LLT::isScalableVector
constexpr bool isScalableVector() const
Returns true if the LLT is a scalable vector.
Definition: LowLevelType.h:182

llvm::LLT::getScalarSizeInBits
constexpr unsigned getScalarSizeInBits() const
Definition: LowLevelType.h:267

llvm::LLT::scalar
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
Definition: LowLevelType.h:42

llvm::LLT::getNumElements
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
Definition: LowLevelType.h:159

llvm::LLT::isVector
constexpr bool isVector() const
Definition: LowLevelType.h:148

llvm::LLT::getSizeInBits
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
Definition: LowLevelType.h:193

llvm::LLT::getElementCount
constexpr ElementCount getElementCount() const
Definition: LowLevelType.h:184

llvm::LLT::changeElementSize
constexpr LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Definition: LowLevelType.h:221

llvm::LLT::fixed_vector
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
Definition: LowLevelType.h:100

llvm::LLT::changeElementCount
constexpr LLT changeElementCount(ElementCount EC) const
Return a vector or scalar with the same element type and the new element count.
Definition: LowLevelType.h:230

llvm::LegalizerInfo
Definition: LegalizerInfo.h:1239

llvm::MachineDominatorTreeWrapperPass
Analysis pass which computes a MachineDominatorTree.
Definition: MachineDominators.h:296

llvm::MachineDominatorTree
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
Definition: MachineDominators.h:75

llvm::MachineFunctionPass
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Definition: MachineFunctionPass.h:30

llvm::MachineFunctionPass::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Definition: MachineFunctionPass.cpp:169

llvm::MachineFunctionPass::runOnMachineFunction
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...

llvm::MachineFunctionProperties::hasProperty
bool hasProperty(Property P) const
Definition: MachineFunction.h:194

llvm::MachineFunction
Definition: MachineFunction.h:258

llvm::MachineFunction::getSubtarget
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Definition: MachineFunction.h:717

llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:727

llvm::MachineFunction::getFunction
Function & getFunction()
Return the LLVM function that this machine code represents.
Definition: MachineFunction.h:683

llvm::MachineFunction::getTarget
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Definition: MachineFunction.h:713

llvm::MachineFunction::getProperties
const MachineFunctionProperties & getProperties() const
Get the function properties.
Definition: MachineFunction.h:808

llvm::MachineIRBuilder
Helper class to build MachineInstr.
Definition: MachineIRBuilder.h:224

llvm::MachineIRBuilder::getObserver
GISelChangeObserver * getObserver()
Definition: MachineIRBuilder.h:374

llvm::MachineIRBuilder::buildPtrAdd
MachineInstrBuilder buildPtrAdd(const DstOp &Res, const SrcOp &Op0, const SrcOp &Op1, std::optional< unsigned > Flags=std::nullopt)
Build and insert Res = G_PTR_ADD Op0, Op1.
Definition: MachineIRBuilder.cpp:202

llvm::MachineIRBuilder::getMF
MachineFunction & getMF()
Getter for the function we currently build.
Definition: MachineIRBuilder.h:276

llvm::MachineIRBuilder::setInstrAndDebugLoc
void setInstrAndDebugLoc(MachineInstr &MI)
Set the insertion point to before MI, and set the debug loc to MI's loc.
Definition: MachineIRBuilder.h:365

llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:69

llvm::MachineInstr::getOperand
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579

llvm::MachineOperand::getShuffleMask
ArrayRef< int > getShuffleMask() const
Definition: MachineOperand.h:622

llvm::MachineOperand::getReg
Register getReg() const
getReg - Returns the register number.
Definition: MachineOperand.h:369

llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:51

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::Pass::getPassName
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81

llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:94

llvm::SmallVectorBase::size
size_t size() const
Definition: SmallVector.h:91

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586

llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:623

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:426

llvm::SmallVectorTemplateCommon::back
reference back()
Definition: SmallVector.h:321

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209

llvm::StoreInfo
Definition: GVNHoist.cpp:195

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50

llvm::TargetMachine::getOptLevel
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
Definition: TargetMachine.cpp:266

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

llvm::TargetSubtargetInfo::getTargetLowering
virtual const TargetLowering * getTargetLowering() const
Definition: TargetSubtargetInfo.h:100

llvm::cl::opt
Definition: CommandLine.h:1423

llvm::details::FixedOrScalableQuantity::multiplyCoefficientBy
constexpr LeafTy multiplyCoefficientBy(ScalarTy RHS) const
Definition: TypeSize.h:258

unsigned

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

false
Definition: StackSlotColoring.cpp:194

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::MIPatternMatch::m_Reg
operand_type_match m_Reg()
Definition: MIPatternMatch.h:270

llvm::MIPatternMatch::m_SpecificICst
SpecificConstantMatch m_SpecificICst(int64_t RequestedValue)
Matches a constant equal to RequestedValue.
Definition: MIPatternMatch.h:195

llvm::MIPatternMatch::m_Pred
operand_type_match m_Pred()
Definition: MIPatternMatch.h:373

llvm::MIPatternMatch::m_ICst
ConstantMatch< APInt > m_ICst(APInt &Cst)
Definition: MIPatternMatch.h:93

llvm::MIPatternMatch::m_GOr
BinaryOp_match< LHS, RHS, TargetOpcode::G_OR, true > m_GOr(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:517

llvm::MIPatternMatch::m_OneNonDBGUse
OneNonDBGUse_match< SubPat > m_OneNonDBGUse(const SubPat &SP)
Definition: MIPatternMatch.h:61

llvm::MIPatternMatch::m_GICmp
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_ICMP > m_GICmp(const Pred &P, const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:674

llvm::MIPatternMatch::mi_match
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
Definition: MIPatternMatch.h:25

llvm::MIPatternMatch::m_GPtrAdd
BinaryOp_match< LHS, RHS, TargetOpcode::G_PTR_ADD, false > m_GPtrAdd(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:470

llvm::MIPatternMatch::m_any_of
Or< Preds... > m_any_of(Preds &&... preds)
Definition: MIPatternMatch.h:314

llvm::MIPatternMatch::m_GAnd
BinaryOp_match< LHS, RHS, TargetOpcode::G_AND, true > m_GAnd(const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:506

llvm::MIPatternMatch::m_GFCmp
CompareOp_match< Pred, LHS, RHS, TargetOpcode::G_FCMP > m_GFCmp(const Pred &P, const LHS &L, const RHS &R)
Definition: MIPatternMatch.h:680

llvm::NVPTX::PTXLdStInstCode::V2
@ V2
Definition: NVPTX.h:134

llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33

llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:137

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::codeview::ModifierOptions::Const
@ Const

llvm::logicalview::LVAttributeKind::Zero
@ Zero

llvm::ms_demangle::IntrinsicFunctionKind::New
@ New

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::getOpcodeDef
MachineInstr * getOpcodeDef(unsigned Opcode, Register Reg, const MachineRegisterInfo &MRI)
See if Reg is defined by an single def instruction that is Opcode.
Definition: Utils.cpp:639

llvm::initializeAArch64PostLegalizerCombinerPass
void initializeAArch64PostLegalizerCombinerPass(PassRegistry &)

llvm::isConstantOrConstantSplatVector
std::optional< APInt > isConstantOrConstantSplatVector(MachineInstr &MI, const MachineRegisterInfo &MRI)
Determines if MI defines a constant integer or a splat vector of constant integers.
Definition: Utils.cpp:1516

llvm::getDefIgnoringCopies
MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition: Utils.cpp:479

llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::createAArch64PostLegalizerCombiner
FunctionPass * createAArch64PostLegalizerCombiner(bool IsOptNone)
Definition: AArch64PostLegalizerCombiner.cpp:774

llvm::getSelectionDAGFallbackAnalysisUsage
void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition: Utils.cpp:1161

llvm::PseudoProbeReservedId::Last
@ Last

llvm::getIConstantVRegValWithLookThrough
std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition: Utils.cpp:426

llvm::CombinerInfo
Definition: CombinerInfo.h:24

llvm::cl::desc
Definition: CommandLine.h:409